*include config.do

#delimit ;
clear ;
postutil clear ;
 
global date: di  %tdCY-N-D  daily("$S_DATE", "DMY") ;
global graphdir "C:\Users\foote008\Documents\GitHub\footestange_biaspaper\paper\figures" ;
global graphdir "C:\Users\kstange\Documents\GitHub\footestange_biaspaper\paper\figures" ;
global datadir "C:\Users\kstange\Documents\GitHub\footestange_biaspaper\disclosed_output";

cap log close ;
log using leave_one_out_$date.log, text replace;

*global othdate = "2023-08-25" ;

*insheet using $supportdir/bias_scatter_barrons_byyear_$othdate.csv, clear ;
*end

version 16 ;
set more off ;
set scheme s1mono ;
*include config.do ;



import delimited "$datadir\20230920\bias_scatter_barrons_byyear_2023-09-08.csv", clear ;

label def statel 8 "CO" 36 "NY" 39 "OH" 42 "PA" 48 "TX" 13 "GA" 27 "MN" 29 "MO" 49 "UT" 51 "VA";
label val state statel;
tab grad_year state ;

tab grad_year state  [fw = out_n];

tab grad_year state  [fw = missing_n];

gen bias = beta_out - beta_in ;

postfile support excluded_state MSE beta se tstat true_bias predict_bias missing_diff using "$graphdir/leave_one_out_$date.dta", replace; 

levelsof state, local(statelist);
tab state ; 
* QUESTIONS
* Do we include year or year_post 
* Do we weight or not weight
* Do we include constant or noconstant? - why does this matter? Shouldnt the fixed effects take care of this?
* Do we include the state fixed effects or not?
;
	di "******************************************" ;
	di "**************** All States  **************" ;
	di "******************************************" ;
reg bias missing_diff /*i.state*/ i.year_post i.grad_year  /*[weight=in_n]*/, /*nocons*/ ;

foreach st in `statelist' { ;
	/* First, run a regression of bias on missing_diff, excluding a state */
	di "******************************************" ;
	di "**************** STATE `st' **************" ;
	di "******************************************" ;
	reg bias missing_diff /*i.state*/ i.year_post i.grad_year if state != `st' /*[weight=in_n]*/, /*nocons*/;
	local beta = _b[missing_diff];
	local se = _se[missing_diff];
	local tstat = `beta'/`se' ;
	*predict bias_pred, xb;
	* Predict the bias and compute the error for the omitted state ;
	predict bias_pred  if state == `st', xb ;
	sum bias if state == `st' ;
	local bias_mean = r(mean) ;
	sum bias_pred if state == `st' ;
	local bias_predict_mean = r(mean) ;
	sum missing_diff if state == `st' ;
	local missing_diff_mean = r(mean) ;	
	gen error_sq = (bias - bias_pred)^2 ;
	summ error_sq ; 
	post support (`st') (r(mean)) (`beta') (`se') (`tstat') (`bias_mean') (`bias_predict_mean') (`missing_diff_mean');
	drop bias_pred error_sq ;
};

postclose support ;

use $graphdir/leave_one_out_$date.dta, clear ;

label def statel 8 "CO" 36 "NY" 39 "OH" 42 "PA" 48 "TX" 13 "GA" 27 "MN" 29 "MO" 49 "UT" 51 "VA" ;
label val excluded_state statel;

scatter true_bias predict_bias, mlabel(excluded_state)  || lfit true_bias predict_bias ||, 	legend(off) title("Actual vs. Leave-One-Out Predicted State-level Bias") 	xtitle("Predicted bias based on rate of missingness") ytitle("Actual bias") ;

graph export $graphdir/leave_one_out.wmf, replace ;

sum MSE, d;


list ; 
di "***** AVERAGE MEAN SQUARED ERROR:   " r(mean) ;
